********************************************************************************
* 03_create_descriptive_tables.do
* Generate Descriptive Tables 1, 2, and 3 for JHR Paper
*
* Inputs:
*   - data/cleaned/ets_treatment_data.dta
*   - data/cleaned/enrollment_event_data.xlsx
*   - data/cleaned/graduation_event_data.xlsx
*   - data/raw/ets/cleaned ppst and core and composite.dta
*
* Outputs:
*   - output/tables/table_1_state_info.xlsx
*   - output/tables/table_2_summary_statistics.xlsx
*   - output/tables/table_3_ppst_zscores.xlsx
********************************************************************************

clear all
set more off

display "========================================"
display "03: Creating Descriptive Tables 1, 2, 3"
display "========================================"

capture mkdir "output"
capture mkdir "output/tables"

* ══════════════════════════════════════════════════════════════════════════════
* TABLE 1: State Descriptive Information
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "Creating Table 1: State Descriptive Information..."

clear
set obs 30

* 30 states that ever used Praxis tests
gen State = ""
local states `" "AK" "AR" "CT" "DC" "DE" "HI" "IN" "IA" "KY" "LA" "ME" "MD" "MS" "NE" "NV" "NH" "NJ" "NC" "ND" "OK" "OR" "PA" "RI" "SC" "TN" "VT" "VA" "WA" "WV" "WI" "'
forvalues i = 1/30 {
    local s : word `i' of `states'
    replace State = "`s'" in `i'
}

* PPST usage
gen PPST = "yes"
foreach st in "IA" "KY" "RI" "WA" {
    replace PPST = "no" if State == "`st'"
}

* Core adoption date
gen Core_Adoption = ""
replace Core_Adoption = "Fall 2014" if inlist(State, "AK", "CT", "KY", "LA", "MD", "NE", "ND", "PA", "WA")
replace Core_Adoption = "Fall 2014" if State == "WI"
replace Core_Adoption = "Fall 2013" if inlist(State, "AR", "DC", "HI", "ME", "MS", "NC", "NH")
replace Core_Adoption = "Fall 2013" if inlist(State, "NJ", "SC", "TN", "VT", "VA", "WV")
replace Core_Adoption = "Spring 2014" if inlist(State, "DE", "OR")
replace Core_Adoption = "Fall 2013" if State == "NV"
replace Core_Adoption = "+" if State == "OK"

* Adopted recommended pass scores
gen Adopted_Scores = "no"
foreach st in "AK" "AR" "DC" "DE" "HI" "LA" "MD" "MS" "NE" "NV" "NH" "NJ" "NC" "OR" "VA" "VT" "WV" "WI" "KY" "RI" {
    replace Adopted_Scores = "yes" if State == "`st'"
}
replace Adopted_Scores = "yes*" if inlist(State, "CT", "ME", "PA", "SC")
replace Adopted_Scores = "no*" if State == "ND"

* Panel participant
gen Panel_Participant = "no"
foreach st in "AK" "AR" "CT" "DC" "DE" "HI" "KY" "LA" "ME" "MD" "MS" "NE" "NH" "NJ" "NC" "ND" "PA" "RI" "SC" "TN" "VT" "WV" "WI" {
    replace Panel_Participant = "yes" if State == "`st'"
}

* In sample
gen In_Sample = "no"
foreach st in "AK" "AR" "CT" "DC" "DE" "HI" "LA" "ME" "MD" "MS" "NC" "NE" "NV" "NH" "NJ" "OR" "PA" "SC" "VA" "VT" "WV" "WI" {
    replace In_Sample = "yes" if State == "`st'"
}

* Shrinking state
* Note: DC is included here to match the published paper's Table 1.
* The regression code (06_main_regressions) excludes DC from shrinking
* subsamples because pop_percentage_change > 0 for DC in the regression data.
gen Shrinking_State = "n/a"
foreach st in "CT" "DC" "LA" "ME" "MS" "NH" "NJ" "PA" "VT" "WV" "WI" {
    replace Shrinking_State = "yes" if State == "`st'"
}
foreach st in "AK" "AR" "DE" "HI" "NC" "NE" "NV" "OR" "SC" "VA" {
    replace Shrinking_State = "no" if State == "`st'"
}
replace Shrinking_State = "no" if State == "MD"

export excel using "output/tables/table_1_state_info.xlsx", firstrow(variables) replace
display "  Saved: table_1_state_info.xlsx"

* ══════════════════════════════════════════════════════════════════════════════
* TABLE 3: Testing Standards (PPST z-scores and Delta TDI)
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "Creating Table 3: Testing Standards..."

* Load raw ETS data
use "data/raw/ets/cleaned ppst and core and composite.dta", clear

* Import and append additional states
tempfile main_ets
save `main_ets'

import excel "data/raw/ets/states_we_add_back_in.xlsx", firstrow clear
tostring ID, replace
gen z_score = (passingscore - test_mean) / test_sd
append using `main_ets'

* Get PPST data for 2012 (last PPST year), time == "old"
keep if year == 2012 & time == "old"
keep State subject passingscore test_mean test_sd z_score

* Rename subjects for consistency
replace subject = "reading" if subject == "read"
replace subject = "writing" if subject == "write"

* Composite states: subtract 3 from raw passing scores
gen is_composite = inlist(State, "HI", "MD", "ME", "NH", "PA", "VA", "VT")
gen passingscore_adj = passingscore
replace passingscore_adj = passingscore - 3 if is_composite == 1
gen z_score_adj = (passingscore_adj - test_mean) / test_sd

* Reshape to wide format (one row per state)
keep State subject passingscore_adj z_score_adj
reshape wide passingscore_adj z_score_adj, i(State) j(subject) string

* Calculate PPST TDI (average of 3 adjusted z-scores)
gen TDI_PPST = (z_score_adjmath + z_score_adjreading + z_score_adjwriting) / 3

* Now get Praxis Core 2014 TDI for Delta computation
* Reload raw data
tempfile ppst_wide
save `ppst_wide'

use "data/raw/ets/cleaned ppst and core and composite.dta", clear
tempfile main_ets2
save `main_ets2'
import excel "data/raw/ets/states_we_add_back_in.xlsx", firstrow clear
tostring ID, replace
gen z_score = (passingscore - test_mean) / test_sd
append using `main_ets2'

keep if year == 2014 & time == "new"
gen z_core = (passingscore - test_mean) / test_sd
collapse (mean) z_core, by(State)
rename z_core TDI_Core

* Merge PPST and Core TDI
merge 1:1 State using `ppst_wide'
keep if _merge == 3
drop _merge

* Compute Delta TDI
gen Delta_TDI = TDI_Core - TDI_PPST

* Filter to sample states
gen in_sample = inlist(State, "AK", "AR", "CT", "DC", "DE", "HI", "LA", "ME") | ///
                inlist(State, "MD", "MS", "NC", "NE", "NV", "NH", "NJ", "OR") | ///
                inlist(State, "PA", "SC", "VA", "VT", "WV", "WI")
keep if in_sample == 1
drop in_sample

* Mark composite states
gen is_composite = inlist(State, "HI", "MD", "ME", "NH", "PA", "VA", "VT")
replace State = State + "*" if is_composite == 1
drop is_composite

* Round numeric columns
foreach var of varlist passingscore_adj* z_score_adj* TDI_PPST TDI_Core Delta_TDI {
    replace `var' = round(`var', 0.01)
}

* Rename for export
rename passingscore_adjmath Math_Raw_Score
rename z_score_adjmath Math_Z_Score
rename passingscore_adjreading Reading_Raw_Score
rename z_score_adjreading Reading_Z_Score
rename passingscore_adjwriting Writing_Raw_Score
rename z_score_adjwriting Writing_Z_Score
rename TDI_PPST TDI
rename Delta_TDI Delta_TDI_val

order State Math_Raw_Score Math_Z_Score Reading_Raw_Score Reading_Z_Score ///
      Writing_Raw_Score Writing_Z_Score TDI Delta_TDI_val

sort State

export excel using "output/tables/table_3_ppst_zscores.xlsx", firstrow(variables) replace
display "  Saved: table_3_ppst_zscores.xlsx"

* ══════════════════════════════════════════════════════════════════════════════
* TABLE 2: Summary Statistics
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "Creating Table 2: Summary Statistics..."

* Check if enrollment data exists
capture confirm file "data/cleaned/enrollment_event_data.xlsx"
if _rc != 0 {
    display "  Warning: enrollment_event_data.xlsx not found. Skipping Table 2."
    display "  Run IPEDS cleaning scripts first."
}
else {
    * Load enrollment data
    import excel "data/cleaned/enrollment_event_data.xlsx", firstrow clear
    drop if State == "ND" | State == "TN"

    * Compute selectivity using median SAT/ACT from 2010 only
    * Per paper Table 2 note (p37): "More selective universities are defined as
    * those with SAT/ACT scores above the in-sample median as of 2010."
    * Uses strictly greater (>) to match 06_main_regressions definition.
    quietly summarize satvr25_2 if year == 2010, detail
    local sat_med = r(p50)
    quietly summarize actcm25_2 if year == 2010, detail
    local act_med = r(p50)

    gen selective = "Less Selective"
    replace selective = "More Selective" if satvr25_2 > `sat_med' & satvr25_2 != .
    replace selective = "More Selective" if actcm25_2 > `act_med' & actcm25_2 != . & selective == "Less Selective"

    * Panel A: Outcome means and SDs
    * Compute summary stats for key variables
    putexcel set "output/tables/table_2_summary_statistics.xlsx", replace

    * Header row
    putexcel A1 = "Variable" B1 = "All" C1 = "More Selective" D1 = "Less Selective"
    putexcel A2 = "Panel A: Outcomes"

    local row = 3
    foreach var in eftotlt efwhitt non_white {
        if "`var'" == "eftotlt" local label "Education fall enrollments"
        if "`var'" == "efwhitt" local label "White education fall enrollments"
        if "`var'" == "non_white" local label "Non-white education fall enrollments"

        * All
        quietly summarize `var'
        local mean_all = round(r(mean), 1)
        local sd_all = round(r(sd), 1)
        * More selective
        quietly summarize `var' if selective == "More Selective"
        local mean_ms = round(r(mean), 1)
        local sd_ms = round(r(sd), 1)
        * Less selective
        quietly summarize `var' if selective == "Less Selective"
        local mean_ls = round(r(mean), 1)
        local sd_ls = round(r(sd), 1)

        putexcel A`row' = "`label'" B`row' = `mean_all' C`row' = `mean_ms' D`row' = `mean_ls'
        local row = `row' + 1
        putexcel A`row' = "" B`row' = "(`sd_all')" C`row' = "(`sd_ms')" D`row' = "(`sd_ls')"
        local row = `row' + 1
    }

    * Panel B: Controls
    local row = `row' + 1
    putexcel A`row' = "Panel B: Time-varying Controls"
    local row = `row' + 1

    foreach var in enrollment_total2 satpct2 actpct2 satmt25_2 satvr25_2 actcm25_2 pell_amount2 pell_percent2 loan_average2 loan_percent2 {
        if "`var'" == "enrollment_total2" local label "Total freshmen enrollment"
        if "`var'" == "satpct2" local label "SAT submission rate"
        if "`var'" == "actpct2" local label "ACT submission rate"
        if "`var'" == "satmt25_2" local label "SAT math 25th percentile"
        if "`var'" == "satvr25_2" local label "SAT verbal 25th percentile"
        if "`var'" == "actcm25_2" local label "ACT composite 25th percentile"
        if "`var'" == "pell_amount2" local label "Pell grant (dollar amount)"
        if "`var'" == "pell_percent2" local label "Pell grant (% students)"
        if "`var'" == "loan_average2" local label "Loan (dollar amount)"
        if "`var'" == "loan_percent2" local label "Loan (% students)"

        quietly summarize `var'
        local mean_all = round(r(mean), 1)
        local sd_all = round(r(sd), 1)
        quietly summarize `var' if selective == "More Selective"
        local mean_ms = round(r(mean), 1)
        local sd_ms = round(r(sd), 1)
        quietly summarize `var' if selective == "Less Selective"
        local mean_ls = round(r(mean), 1)
        local sd_ls = round(r(sd), 1)

        putexcel A`row' = "`label'" B`row' = `mean_all' C`row' = `mean_ms' D`row' = `mean_ls'
        local row = `row' + 1
        putexcel A`row' = "" B`row' = "(`sd_all')" C`row' = "(`sd_ms')" D`row' = "(`sd_ls')"
        local row = `row' + 1
    }

    * Number of universities
    quietly tab unitid
    local n_all = r(r)
    quietly tab unitid if selective == "More Selective"
    local n_ms = r(r)
    quietly tab unitid if selective == "Less Selective"
    local n_ls = r(r)

    putexcel A`row' = "Number of universities" B`row' = `n_all' C`row' = `n_ms' D`row' = `n_ls'

    putexcel close

    display "  Saved: table_2_summary_statistics.xlsx"
    display "  Total universities: `n_all'"
}

* ──────────────────────────────────────────────────────────────────────────────
* Summary
* ──────────────────────────────────────────────────────────────────────────────

display ""
display "========================================"
display "Descriptive Tables Summary"
display "========================================"
display "Created:"
display "  - Table 1: State Descriptive Information"
display "  - Table 2: Summary Statistics"
display "  - Table 3: Testing Standards (PPST z-scores)"
display ""
display "03_create_descriptive_tables.do complete."
